Import Libraries and Data¶
## code to display the plotly graphs in github
import plotly.io as pio
from IPython.display import HTML
pio.renderers.default = "notebook"
HTML('''
<script src="https://cdn.plot.ly/plotly-latest.min.js"></script>
''')
import os
from helper_funcs import get_tokens,get_headers,fetch_top_posts,fetch_last_posts,plotly_graphs
import pandas as pd
import numpy as np
import ast
from sklearn.preprocessing import MultiLabelBinarizer
from xgboost import XGBClassifier
import plotly.express as px
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix,classification_report
import plotly.graph_objects as go
PRODUCT_HUNT_API_KEY = os.getenv("PRODUCT_HUNT_API_KEY")
PRODUCT_HUNT_API_SECRET = os.getenv('PRODUCT_HUNT_API_SECRET')
token, token_type = get_tokens()
headers = get_headers(token,token_type)
start_date="2024-08-01T00:00:00Z"
end_date = "2024-08-31T23:59:59Z"
top_posts = fetch_top_posts(start_date=start_date,end_date=end_date,headers=headers, limit =100)
last_posts = fetch_last_posts(start_date=start_date,end_date=end_date,headers=headers, limit =100)
print(f"Name :{top_posts[1]['node']['name']},\nDescription:{top_posts[1]['node']['description']},\nFirst comment:{top_posts[1]['node']['comments']['nodes'][0]['body']}")
Name :Me.bot, Description:Me.bot captures and connects your thoughts to understand you better, synthesizing a coach for all your life challenges, from a big career move to a small gloomy moment., First comment:Hello, I’m Felix Tao, the CEO of Mindverse. Let me introduce <b>Me.bot</b> to you. It is an app I use everyday, for several hours! Surrounded by centralized AI models, we believe in exploring a path where everyone can <b>train their own personal AI</b>. Everyone deserves an AI defined by them, not by a "Big Brother." <b>Our solution</b> Our product, Me.bot, is designed to be a <b>personalized AI companion</b>. It learns and evolves with you, coaching you based on your unique experiences and interactions. You can easily build your memory archive with Me.bot all-compassing multimodal recognition, and Me.bot will connect the dots of your memories to inspire and support you. <b>Key features</b> 🌟<b>Serendipity</b>: Me.bot learns from you, offering inspiration and advice when you need it most. 🧠<b>Second Brain</b>: Me.bot helps you understand yourself better and presents its insights into you. 💬<b>Speak to Remind</b>: Set reminders with your voice—it's easier than ever. 📁<b>Smart Topics</b>: Automatically organized AI folders keep your archives tidy and easy to navigate. We're already seeing Me.bot make a difference. Some users have told us that Me.bot suggested a LinkedIn connection that led to a <b>job opportunity</b>. Others shared that, thanks to Me.bot, they decided to pursue a degree in sociology at the age of 35. Looking ahead, we plan to introduce features like shared memories and a bot community where your personal Me.bot can interact with others. <b>Finally, kudos to @chrismessina who's supported our launch once again!</b> By the way, sign up today to enjoy a <b>30-day reward</b> by completing your new user task!
print(f"Name :{last_posts[1]['node']['name']},\nDescription:{last_posts[1]['node']['description']},\nFirst comment:{last_posts[1]['node']['comments']['nodes'][0]['body']}")
Name :Business Digitaly, Description:Elevate your online presence with expert SEO, Google Ads, web development, and marketing services. Maximize ROI with BusinessDigitaly today!, First comment:Excited to announce the launch of BusinessDigitaly on Product Hunt! 🎉 As one of the top digital marketing agencies in the USA, we're dedicated to helping businesses grow through innovative digital strategies. 🚀 Whether you're looking for the best digital marketing company in the USA or need expert guidance on your digital journey, we've got you covered. Check out our product, and let's get the conversation started! We’d love to hear your thoughts and answer any questions you might have.
EDA - Top Posts of August¶
top_posts_df = pd.DataFrame([top_posts[i]['node'] for i in range(len(top_posts))])
top_posts_df.head()
| name | description | url | votesCount | createdAt | tagline | commentsCount | comments | topics | |
|---|---|---|---|---|---|---|---|---|---|
| 0 | Wordware (YC S24) | Wordware is an IDE that enables anyone to buil... | https://www.producthunt.com/posts/wordware-yc-... | 7465 | 2024-08-02T07:01:00Z | Your tool for building AI agents with natural ... | 165 | {'nodes': [{'body': '👋🏻 Hi Product Hunt makers... | {'nodes': [{'slug': 'software-engineering'}, {... |
| 1 | Me.bot | Me.bot captures and connects your thoughts to ... | https://www.producthunt.com/posts/me-bot-2?utm... | 2664 | 2024-08-06T07:01:00Z | The inspiring companion for your life | 418 | {'nodes': [{'body': 'Hello, I’m Felix Tao, the... | {'nodes': [{'slug': 'productivity'}, {'slug': ... |
| 2 | 10xlaunch | Just add our one-line script to your website a... | https://www.producthunt.com/posts/10xlaunch?ut... | 1573 | 2024-08-27T07:01:00Z | Get 10x more users from same website traffic | 205 | {'nodes': [{'body': 'Hey fam, Mo here from 10... | {'nodes': [{'slug': 'sales'}, {'slug': 'artifi... |
| 3 | Flowith | Flowith is the AI for deep work. Surpassing tr... | https://www.producthunt.com/posts/flowith?utm_... | 1476 | 2024-08-07T07:01:00Z | AI for deep work | 108 | {'nodes': [{'body': 'Hey Product Hunt communit... | {'nodes': [{'slug': 'productivity'}, {'slug': ... |
| 4 | Brainybear.ai | Build AI Chatbots in 3 Steps and Train in 3 Cl... | https://www.producthunt.com/posts/brainybear-a... | 1370 | 2024-08-06T07:01:00Z | Train AI chatbots in 3 clicks and help custome... | 105 | {'nodes': [{'body': 'Hey Hunters, I'm thrille... | {'nodes': [{'slug': 'messaging'}, {'slug': 'ar... |
top_posts_df['date'] = pd.to_datetime(top_posts_df['createdAt']).dt.date
top_posts_df['day'] = pd.to_datetime(top_posts_df['createdAt']).dt.day_name()
top_posts_df['topic_list'] = top_posts_df['topics'].apply(lambda x: [j['slug'] for j in x['nodes']])
plotly_graphs('histogram',top_posts_df['day'],title = 'Day of Launch')
Of the top performing posts of August, the common trend is to launch on Weekdays.
plotly_graphs('scatter',x=top_posts_df['day'],y=top_posts_df['votesCount'],
title='Vote Count compared to day of Launch')
plotly_graphs('box',x=top_posts_df['day'],y=top_posts_df['votesCount'],
title='Vote Count compared to day of Launch')
plotly_graphs('bar',
x=top_posts_df.groupby('day')['votesCount'].mean().sort_values(ascending=False).round(0).index,
y=top_posts_df.groupby('day')['votesCount'].mean().sort_values(ascending=False).round(0),
title='Average Vote Count vs Day of Launch')
top_posts_df.groupby('day')['votesCount'].mean().round(0).sort_values(ascending=False)
day Friday 1191.0 Tuesday 877.0 Wednesday 778.0 Monday 734.0 Thursday 691.0 Saturday 640.0 Sunday 590.0 Name: votesCount, dtype: float64
top_posts_df[top_posts_df['votesCount']<7000].groupby('day')['votesCount'].mean().round(0).sort_values(ascending=False)
day Tuesday 877.0 Wednesday 778.0 Monday 734.0 Thursday 691.0 Friday 668.0 Saturday 640.0 Sunday 590.0 Name: votesCount, dtype: float64
top_posts_df.groupby('day')['commentsCount'].mean().round(0).sort_values(ascending=False)
day Tuesday 187.0 Wednesday 174.0 Monday 172.0 Thursday 172.0 Friday 156.0 Saturday 141.0 Sunday 126.0 Name: commentsCount, dtype: float64
plotly_graphs('bar',
x=top_posts_df.groupby('day')['commentsCount'].mean().sort_values(ascending=False).round(0).index,
y=top_posts_df.groupby('day')['commentsCount'].mean().sort_values(ascending=False).round(0),
title='Average comments Count vs Day of Launch')
Few trends observed:
- The average votes gained seems to be high on the weekdays rather than on Saturdays and Sundays.
- Also it's worth noting that launch of one startup heavily skewed the average votes data
One of the reasons behind this trend may be that people like to enjoy their weekends and are not that active on Product Hunt.
Also typically, from observation, the launches that do well in the early hours or on the first day of the launch tend to do well throughout. One way to test if this observation holds true is to check the correlation between the votes gained within 24 hours to launch vs total votes gained. But this is not possible as the Product Hunt's GraphQL API only provides total votes at the moment
plotly_graphs('bar',
x=top_posts_df.explode('topic_list')['topic_list'].value_counts()[:10].index,
y=top_posts_df.explode('topic_list')['topic_list'].value_counts()[:10],
title = 'Topics of Top 100 launches of August'
)
As expected most (64 of 100) of the top launches in August are related to the AI domain
I have been following product Hunt for a while and one thing that stands out in top performing posts almost always has been the first comment from the maker.
EDA II¶
Let us take a look at the difference between launches that are voted the most and voted the least in the month of August
Here are a few things we can take a look at:
- The description of the product/service
- First comment by the maker: which provides a detailed overview of how the product works and other details
We can use the description to train a classification model to determine whether a launch will be successful based on the number of votes it gets
Although product matters the most when it comes to the votes it gets but how the product is marketed also matters
for i in range(len(top_posts_df[:5])):
print(top_posts_df.loc[i,'description'])
print()
Wordware is an IDE that enables anyone to build complex AI Agents and applications. Domain experts and engineers can now iterate 20x faster with prebuilt tools, API deployment, tracing, and more. Finally, build high-quality and reliable AI! Me.bot captures and connects your thoughts to understand you better, synthesizing a coach for all your life challenges, from a big career move to a small gloomy moment. Just add our one-line script to your website and know exactly who's visiting your website – get names, emails, and LinkedIn profiles of your anonymous website visitors automatically as soon as they land on your website. Flowith is the AI for deep work. Surpassing traditional chat-based tools, it streamlines tasks on a multi-thread interface powered by a most advanced agent framework. The intuitive canvas and smart framework boost productivity, helping users stay in the flow. Build AI Chatbots in 3 Steps and Train in 3 Clicks. Brainybear scans your website or uploaded files to deliver quick, accurate AI answers to customer queries.
last_posts_df = pd.DataFrame([last_posts[i]['node'] for i in range(len(top_posts))])
last_posts_df['date'] = pd.to_datetime(last_posts_df['createdAt']).dt.date
last_posts_df['day'] = pd.to_datetime(last_posts_df['createdAt']).dt.day_name()
last_posts_df['topic_list'] = last_posts_df['topics'].apply(lambda x: [j['slug'] for j in x['nodes']])
last_posts_df.head()
| name | description | url | votesCount | createdAt | tagline | commentsCount | comments | topics | date | day | topic_list | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | MPS - Major Professional Services | Major Professional Services (MPS) offers fixed... | https://www.producthunt.com/posts/mps-major-pr... | 1 | 2024-08-06T17:57:11Z | Unlock Your Financial Potential | 0 | {'nodes': []} | {'nodes': [{'slug': 'fintech'}, {'slug': 'inve... | 2024-08-06 | Tuesday | [fintech, investing, money] |
| 1 | Business Digitaly | Elevate your online presence with expert SEO, ... | https://www.producthunt.com/posts/business-dig... | 1 | 2024-08-06T18:01:00Z | Digital Marketing Agency in USA | 1 | {'nodes': [{'body': 'Excited to announce the l... | {'nodes': [{'slug': 'marketing'}, {'slug': 'se... | 2024-08-06 | Tuesday | [marketing, seo, web-design] |
| 2 | Modern Brick Haus | Discover Bonsai Tree DIY brick build kits at M... | https://www.producthunt.com/posts/modern-brick... | 1 | 2024-08-07T13:07:55Z | Discover The Bonsai Tree - Build Kits with Mod... | 0 | {'nodes': []} | {'nodes': [{'slug': 'home'}, {'slug': 'craftin... | 2024-08-07 | Wednesday | [home, crafting, diy] |
| 3 | Tech Leads IT | Are you seeking comprehensive Oracle Fusion SC... | https://www.producthunt.com/posts/tech-leads-i... | 1 | 2024-08-06T05:58:46Z | Oracle Fusion SCM Online Training | 1 | {'nodes': [{'body': 'Are you seeking comprehen... | {'nodes': [{'slug': 'education'}, {'slug': 'on... | 2024-08-06 | Tuesday | [education, online-learning, career] |
| 4 | MainStreet E-commerce | Highly customizable e-commerce software for bu... | https://www.producthunt.com/posts/mainstreet-e... | 1 | 2024-08-06T18:16:47Z | MainStreet | 1 | {'nodes': [{'body': 'Feature rich out of the b... | {'nodes': [{'slug': 'saas'}, {'slug': 'e-comme... | 2024-08-06 | Tuesday | [saas, e-commerce, business] |
for i in range(len(last_posts_df[:5])):
print(last_posts_df.loc[i,'description'])
print()
Major Professional Services (MPS) offers fixed income investing, bonds, private debt investments, and alternative fixed income products for institutional, wholesale, and retail investors. Elevate your online presence with expert SEO, Google Ads, web development, and marketing services. Maximize ROI with BusinessDigitaly today! Discover Bonsai Tree DIY brick build kits at Modern Brick Haus. You can easily create a miniature masterpiece to add greenery to your modern home. Buy Now! Are you seeking comprehensive Oracle Fusion SCM Online Training to elevate your expertise in supply chain management? Look no further than Tech Leads IT, a leading institute renowned for its top-notch training programs. Enroll now Highly customizable e-commerce software for businesses of medium and large sizes that your operation team will actually love.
top_posts_df.iloc[0]['comments']['nodes'][0]['body']
'👋🏻 Hi Product Hunt makers!\n\nI’m Kamil, Head of Growth at Wordware—an IDE for building AI agents. Today, we’re officially launching the Wordware platform, and we’re excited to show the world what we built.\n\nIt’s a tool (an IDE) that enables you to quickly build custom AI agents for specific use cases like legal contract generation, marketing content automation, invoice analysis, candidate screening, generating PRDs, and many more. We call applications built on Wordware ‘WordApps’ because you can create them using natural language—in other words, using words (pun intended).\n\nOur core belief is that the domain expert—not the engineer—knows what good LLM output looks like. For example, lawyers building legal SaaS need to be deeply involved in the process, and working directly in the codebase or going back-and-forth with engineers isn’t the way to go.\n\nMost of our clients are cross-functional teams, including less technical members, who need to collaborate with engineers on LLM applications, such as assessing prompt outputs, and care about the speed of iterations. \n\nThese include early-stage startups building AI-first solutions that treat Wordware as their LLM backend, larger corporations that build 100s of prototypes on us, and AI builders creating their own products. From venture partners aiming to be in the top 0.1% of their field by building AI agents to analyze startups and founders, to lawyers scaling their expertise through AI, Wordware is the go-to tool.\n\nAnd if you’re technical, then you’ll appreciate Wordware for the speed of building complex AI agents without messy LLM abstractions, as well as our advanced capabilities like loops, conditional logic (IF-Else), structured generation (JSON mode), and custom code execution, allowing you to connect to virtually any API.\n\nNote: you must not be afraid of no/low-code tools and simply accept it’s 10x faster than writing everything yourself—not to mention the annoying process of iterations on prompts in the codebase 😉\n\nHere’s what you can do with Wordware - example use cases:\n\n✍🏻 Custom content generation AI agent: Researches topics and writes SEO-optimized blog posts using online-enabled models and looping through table of contents. It helped created fully automated content execution for many teams.\n🧠 Invoice processing: Analyzes and processes unstructured invoices efficiently (up to 25k daily by one of our clients!), categorizing them and giving insights on financial data. Not only that, based on the data on the invoices it gives personalized recommendations on where to find cheaper alternatives based on geolocalization.\n📊 Data querying and reporting: Performs data analysis through natural language commands, queries databases and constructs personalized reports. Users don’t have to be SQL fluent to get information, it can query big databases and save over 10h of work per week.\n📚 Personalized learning and assessment agent: Prepares learning material personalized to the user’s level and needs by searching Wikipedia or research papers, then prepares quizzes and assesses user answers automatically.\n✨ Sales enrichment: Searches LinkedIn and the web for information on leads, then updates and enriches your CRM with relevant data. Using different sources we can make sure the provided data is relevant and not hallucinated, making personalized outbound more effective.\n💬 Meeting summaries: Provides personal voice summaries of all your weekly meetings in a structured manner. Not only that, it also categorizes them by meeting type so you never get lost in the notes.\n\nPS: Wordware is the platform behind Audioscribe (<a href="https://audioscribe.wordware.ai" target="_blank" rel="nofollow noopener noreferrer">https://audioscribe.wordware.ai</a>) and Twitter Personality (<a href="https://twitter.wordware.ai" target="_blank" rel="nofollow noopener noreferrer">https://twitter.wordware.ai</a>) projects, our open-source projects that we built to showcase the possibilities of Wordware.\n\nPS2: our Twitter Personality app gained 1 million users in the last 4 days - it’s a great showcase of what prompting and building an LLM backend on Wordware can achieve.'
print(top_posts_df.iloc[0]['comments']['nodes'][0]['body'])
👋🏻 Hi Product Hunt makers! I’m Kamil, Head of Growth at Wordware—an IDE for building AI agents. Today, we’re officially launching the Wordware platform, and we’re excited to show the world what we built. It’s a tool (an IDE) that enables you to quickly build custom AI agents for specific use cases like legal contract generation, marketing content automation, invoice analysis, candidate screening, generating PRDs, and many more. We call applications built on Wordware ‘WordApps’ because you can create them using natural language—in other words, using words (pun intended). Our core belief is that the domain expert—not the engineer—knows what good LLM output looks like. For example, lawyers building legal SaaS need to be deeply involved in the process, and working directly in the codebase or going back-and-forth with engineers isn’t the way to go. Most of our clients are cross-functional teams, including less technical members, who need to collaborate with engineers on LLM applications, such as assessing prompt outputs, and care about the speed of iterations. These include early-stage startups building AI-first solutions that treat Wordware as their LLM backend, larger corporations that build 100s of prototypes on us, and AI builders creating their own products. From venture partners aiming to be in the top 0.1% of their field by building AI agents to analyze startups and founders, to lawyers scaling their expertise through AI, Wordware is the go-to tool. And if you’re technical, then you’ll appreciate Wordware for the speed of building complex AI agents without messy LLM abstractions, as well as our advanced capabilities like loops, conditional logic (IF-Else), structured generation (JSON mode), and custom code execution, allowing you to connect to virtually any API. Note: you must not be afraid of no/low-code tools and simply accept it’s 10x faster than writing everything yourself—not to mention the annoying process of iterations on prompts in the codebase 😉 Here’s what you can do with Wordware - example use cases: ✍🏻 Custom content generation AI agent: Researches topics and writes SEO-optimized blog posts using online-enabled models and looping through table of contents. It helped created fully automated content execution for many teams. 🧠 Invoice processing: Analyzes and processes unstructured invoices efficiently (up to 25k daily by one of our clients!), categorizing them and giving insights on financial data. Not only that, based on the data on the invoices it gives personalized recommendations on where to find cheaper alternatives based on geolocalization. 📊 Data querying and reporting: Performs data analysis through natural language commands, queries databases and constructs personalized reports. Users don’t have to be SQL fluent to get information, it can query big databases and save over 10h of work per week. 📚 Personalized learning and assessment agent: Prepares learning material personalized to the user’s level and needs by searching Wikipedia or research papers, then prepares quizzes and assesses user answers automatically. ✨ Sales enrichment: Searches LinkedIn and the web for information on leads, then updates and enriches your CRM with relevant data. Using different sources we can make sure the provided data is relevant and not hallucinated, making personalized outbound more effective. 💬 Meeting summaries: Provides personal voice summaries of all your weekly meetings in a structured manner. Not only that, it also categorizes them by meeting type so you never get lost in the notes. PS: Wordware is the platform behind Audioscribe (<a href="https://audioscribe.wordware.ai" target="_blank" rel="nofollow noopener noreferrer">https://audioscribe.wordware.ai</a>) and Twitter Personality (<a href="https://twitter.wordware.ai" target="_blank" rel="nofollow noopener noreferrer">https://twitter.wordware.ai</a>) projects, our open-source projects that we built to showcase the possibilities of Wordware. PS2: our Twitter Personality app gained 1 million users in the last 4 days - it’s a great showcase of what prompting and building an LLM backend on Wordware can achieve.
print(top_posts_df.iloc[1]['comments']['nodes'][0]['body'])
Hello, I’m Felix Tao, the CEO of Mindverse. Let me introduce <b>Me.bot</b> to you. It is an app I use everyday, for several hours! Surrounded by centralized AI models, we believe in exploring a path where everyone can <b>train their own personal AI</b>. Everyone deserves an AI defined by them, not by a "Big Brother." <b>Our solution</b> Our product, Me.bot, is designed to be a <b>personalized AI companion</b>. It learns and evolves with you, coaching you based on your unique experiences and interactions. You can easily build your memory archive with Me.bot all-compassing multimodal recognition, and Me.bot will connect the dots of your memories to inspire and support you. <b>Key features</b> 🌟<b>Serendipity</b>: Me.bot learns from you, offering inspiration and advice when you need it most. 🧠<b>Second Brain</b>: Me.bot helps you understand yourself better and presents its insights into you. 💬<b>Speak to Remind</b>: Set reminders with your voice—it's easier than ever. 📁<b>Smart Topics</b>: Automatically organized AI folders keep your archives tidy and easy to navigate. We're already seeing Me.bot make a difference. Some users have told us that Me.bot suggested a LinkedIn connection that led to a <b>job opportunity</b>. Others shared that, thanks to Me.bot, they decided to pursue a degree in sociology at the age of 35. Looking ahead, we plan to introduce features like shared memories and a bot community where your personal Me.bot can interact with others. <b>Finally, kudos to @chrismessina who's supported our launch once again!</b> By the way, sign up today to enjoy a <b>30-day reward</b> by completing your new user task!
[last_posts_df.iloc[12]['comments']['nodes'][0]['body'] if(last_posts_df.iloc[12]['comments']['nodes']) else None]
[None]
top_first_comments_len = [len(top_posts[i]['node']['comments']['nodes'][0]['body'])
for i in range(len(top_posts))]
last_first_comments_len = [len(last_posts[i]['node']['comments']['nodes'][0]['body'])
if last_posts[i]['node']['comments']['nodes'] else 0
for i in range(len(last_posts))]
plotly_graphs('histogram',x=top_first_comments_len)
np.array(top_first_comments_len).mean()
1595.18
plotly_graphs('histogram',x=last_first_comments_len)
As we can see from couple of example from both types of posts, the first comment from the maker is very detailed in top performing launches whereas the first comments in case of the launches with least amount of votes the comment is not very detailed and sometimes there is no comment from the maker
Classification Model¶
df = pd.read_csv('PH_posts_data.csv')
df.head()
| Unnamed: 0 | name | description | url | votesCount | createdAt | tagline | commentsCount | comments | topics | |
|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0 | Not Diamond | Not Diamond isn’t like other chatbots you’ve u... | https://www.producthunt.com/posts/not-diamond?... | 691 | 2024-08-01T07:01:00Z | The last chatbot you’ll ever need | 219 | {'nodes': [{'body': 'Hey Product Hunt!\r\n\r\n... | {'nodes': [{'slug': 'developer-tools'}, {'slug... |
| 1 | 1 | Clarity | Clarity is purpose-built for founder-led sales... | https://www.producthunt.com/posts/clarity-9f37... | 545 | 2024-08-01T07:01:00Z | A meeting recorder for founder-led sales | 199 | {'nodes': [{'body': "Congrats on the launch Au... | {'nodes': [{'slug': 'productivity'}, {'slug': ... |
| 2 | 2 | Mito Health | Mito Health uses blood work at regular labs to... | https://www.producthunt.com/posts/mito-health?... | 358 | 2024-08-01T07:01:00Z | Better insights from bloodwork | 117 | {'nodes': [{'body': 'Hi Product Hunt! 👋 \n\nWe... | {'nodes': [{'slug': 'health-fitness'}, {'slug'... |
| 3 | 3 | EduWiz.AI | Improve your writing effortlessly with EduWiz.... | https://www.producthunt.com/posts/eduwiz-ai?ut... | 292 | 2024-08-01T07:01:00Z | Write magical paperwork in seconds with AI | 105 | {'nodes': [{'body': 'Hey, Product Hunt! 👋\n\nI... | {'nodes': [{'slug': 'writing'}, {'slug': 'educ... |
| 4 | 4 | Mind Visuals | Stay in your creator zone and edit videos in s... | https://www.producthunt.com/posts/mind-visuals... | 278 | 2024-08-01T07:01:00Z | Drag and drop animations for creators | 61 | {'nodes': [{'body': 'Mind Visuals is now live!... | {'nodes': [{'slug': 'design-tools'}, {'slug': ... |
df.drop('Unnamed: 0',axis=1,inplace=True)
df['Date'] = pd.to_datetime(df['createdAt']).dt.date
# df['date'] = pd.to_datetime(df['createdAt']).dt.date
df['day'] = pd.to_datetime(df['createdAt']).dt.day_name()
df['topic_list'] = df['topics'].apply(lambda x: [j['slug'] for j in ast.literal_eval(x)['nodes']])
df.head()
| name | description | url | votesCount | createdAt | tagline | commentsCount | comments | topics | Date | day | topic_list | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Not Diamond | Not Diamond isn’t like other chatbots you’ve u... | https://www.producthunt.com/posts/not-diamond?... | 691 | 2024-08-01T07:01:00Z | The last chatbot you’ll ever need | 219 | {'nodes': [{'body': 'Hey Product Hunt!\r\n\r\n... | {'nodes': [{'slug': 'developer-tools'}, {'slug... | 2024-08-01 | Thursday | [developer-tools, artificial-intelligence, bots] |
| 1 | Clarity | Clarity is purpose-built for founder-led sales... | https://www.producthunt.com/posts/clarity-9f37... | 545 | 2024-08-01T07:01:00Z | A meeting recorder for founder-led sales | 199 | {'nodes': [{'body': "Congrats on the launch Au... | {'nodes': [{'slug': 'productivity'}, {'slug': ... | 2024-08-01 | Thursday | [productivity, sales, artificial-intelligence] |
| 2 | Mito Health | Mito Health uses blood work at regular labs to... | https://www.producthunt.com/posts/mito-health?... | 358 | 2024-08-01T07:01:00Z | Better insights from bloodwork | 117 | {'nodes': [{'body': 'Hi Product Hunt! 👋 \n\nWe... | {'nodes': [{'slug': 'health-fitness'}, {'slug'... | 2024-08-01 | Thursday | [health-fitness, artificial-intelligence, life... |
| 3 | EduWiz.AI | Improve your writing effortlessly with EduWiz.... | https://www.producthunt.com/posts/eduwiz-ai?ut... | 292 | 2024-08-01T07:01:00Z | Write magical paperwork in seconds with AI | 105 | {'nodes': [{'body': 'Hey, Product Hunt! 👋\n\nI... | {'nodes': [{'slug': 'writing'}, {'slug': 'educ... | 2024-08-01 | Thursday | [writing, education, artificial-intelligence] |
| 4 | Mind Visuals | Stay in your creator zone and edit videos in s... | https://www.producthunt.com/posts/mind-visuals... | 278 | 2024-08-01T07:01:00Z | Drag and drop animations for creators | 61 | {'nodes': [{'body': 'Mind Visuals is now live!... | {'nodes': [{'slug': 'design-tools'}, {'slug': ... | 2024-08-01 | Thursday | [design-tools, marketing, video] |
Generally anything above 200 upvotes on Product Hunt has a chance of becoming product of the day and 200-300 upvotes is considered a good lauch.
df[df['votesCount']>200].shape
(239, 12)
def categorize_votes(vote):
if vote > 150:
return 'Success'
elif vote > 50:
return 'Average'
else:
return 'low'
df['vote_category'] = df['votesCount'].apply(categorize_votes)
first_comments=[ast.literal_eval(df['comments'][i])['nodes'][0]['body']
if len(ast.literal_eval(df['comments'][i])['nodes']) else None
for i in range(len(df))]
df['first_comments'] = first_comments
df['first_comments_len']=[len(first_comments[i]) if first_comments[i] else 0
for i in range(len(df['first_comments']))]
ast.literal_eval(df['topics'][0])['nodes']
[{'slug': 'developer-tools'},
{'slug': 'artificial-intelligence'},
{'slug': 'bots'}]
df[['votesCount','commentsCount','first_comments_len']].corr()
| votesCount | commentsCount | first_comments_len | |
|---|---|---|---|
| votesCount | 1.000000 | 0.701128 | 0.246992 |
| commentsCount | 0.701128 | 1.000000 | 0.298725 |
| first_comments_len | 0.246992 | 0.298725 | 1.000000 |
f'Number of topics: {len(list(sorted(set(df['topic_list'].explode().values))))}'
'Number of topics: 295'
mlb = MultiLabelBinarizer()
topics_one_hot = mlb.fit_transform(df['topic_list'])
plotly_graphs('histogram',
x=[len(top_posts_df['description'][i]) for i in range(len(top_posts_df))],
title='Length of description of Top Posts')
plotly_graphs('histogram',
x=[len(last_posts_df['description'][i]) for i in range(len(top_posts_df))],
title='Length of description of Top Posts')
topics_one_hot_df = pd.DataFrame(topics_one_hot, columns=mlb.classes_)
df = pd.concat([df, topics_one_hot_df], axis=1)
df.head()
| name | description | url | votesCount | createdAt | tagline | commentsCount | comments | topics | Date | ... | weather | web-app | web-design | web3 | website-builder | wi-fi | word-games | wordpress | writing | youtube | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Not Diamond | Not Diamond isn’t like other chatbots you’ve u... | https://www.producthunt.com/posts/not-diamond?... | 691 | 2024-08-01T07:01:00Z | The last chatbot you’ll ever need | 219 | {'nodes': [{'body': 'Hey Product Hunt!\r\n\r\n... | {'nodes': [{'slug': 'developer-tools'}, {'slug... | 2024-08-01 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 1 | Clarity | Clarity is purpose-built for founder-led sales... | https://www.producthunt.com/posts/clarity-9f37... | 545 | 2024-08-01T07:01:00Z | A meeting recorder for founder-led sales | 199 | {'nodes': [{'body': "Congrats on the launch Au... | {'nodes': [{'slug': 'productivity'}, {'slug': ... | 2024-08-01 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 2 | Mito Health | Mito Health uses blood work at regular labs to... | https://www.producthunt.com/posts/mito-health?... | 358 | 2024-08-01T07:01:00Z | Better insights from bloodwork | 117 | {'nodes': [{'body': 'Hi Product Hunt! 👋 \n\nWe... | {'nodes': [{'slug': 'health-fitness'}, {'slug'... | 2024-08-01 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 3 | EduWiz.AI | Improve your writing effortlessly with EduWiz.... | https://www.producthunt.com/posts/eduwiz-ai?ut... | 292 | 2024-08-01T07:01:00Z | Write magical paperwork in seconds with AI | 105 | {'nodes': [{'body': 'Hey, Product Hunt! 👋\n\nI... | {'nodes': [{'slug': 'writing'}, {'slug': 'educ... | 2024-08-01 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 |
| 4 | Mind Visuals | Stay in your creator zone and edit videos in s... | https://www.producthunt.com/posts/mind-visuals... | 278 | 2024-08-01T07:01:00Z | Drag and drop animations for creators | 61 | {'nodes': [{'body': 'Mind Visuals is now live!... | {'nodes': [{'slug': 'design-tools'}, {'slug': ... | 2024-08-01 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
5 rows × 310 columns
daytoint = {'Monday': 1,
'Tuesday': 2,
'Wednesday': 3,
'Thursday': 4,
'Friday': 5,
'Saturday': 6,
'Sunday': 7,
}
vote_category_dict ={'low':0,
'Average':1,
'Success':2}
df['daytoint']= df['day'].map(daytoint)
df['description_len']= df['description'].apply(lambda i: len(i))
x= df.drop(list(df.dtypes[df.dtypes==object].index)+['votesCount','commentsCount'],axis=1)
y = df['vote_category']
x.shape,y.shape
((1860, 298), (1860,))
x_train,x_test,y_train,y_test = train_test_split(x,y.map(vote_category_dict),test_size = 0.2)
x_train.shape,y_train.shape,x_test.shape,y_test.shape
((1488, 298), (1488,), (372, 298), (372,))
y.map(vote_category_dict).value_counts()
vote_category 0 1060 1 458 2 342 Name: count, dtype: int64
xgb= XGBClassifier(max_depth =3,max_leaves =4,learning_rate =0.1)
xgb.fit(x_train,y_train)
XGBClassifier(base_score=None, booster=None, callbacks=None,
colsample_bylevel=None, colsample_bynode=None,
colsample_bytree=None, device=None, early_stopping_rounds=None,
enable_categorical=False, eval_metric=None, feature_types=None,
gamma=None, grow_policy=None, importance_type=None,
interaction_constraints=None, learning_rate=0.1, max_bin=None,
max_cat_threshold=None, max_cat_to_onehot=None,
max_delta_step=None, max_depth=3, max_leaves=4,
min_child_weight=None, missing=nan, monotone_constraints=None,
multi_strategy=None, n_estimators=None, n_jobs=None,
num_parallel_tree=None, objective='multi:softprob', ...)In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
XGBClassifier(base_score=None, booster=None, callbacks=None,
colsample_bylevel=None, colsample_bynode=None,
colsample_bytree=None, device=None, early_stopping_rounds=None,
enable_categorical=False, eval_metric=None, feature_types=None,
gamma=None, grow_policy=None, importance_type=None,
interaction_constraints=None, learning_rate=0.1, max_bin=None,
max_cat_threshold=None, max_cat_to_onehot=None,
max_delta_step=None, max_depth=3, max_leaves=4,
min_child_weight=None, missing=nan, monotone_constraints=None,
multi_strategy=None, n_estimators=None, n_jobs=None,
num_parallel_tree=None, objective='multi:softprob', ...)print(f'Training Accuracy: {accuracy_score(xgb.predict(x_train),y_train)}')
print(f'Training Confusion matrix: {confusion_matrix(xgb.predict(x_train),y_train)}')
Training Accuracy: 0.6458333333333334 Training Confusion matrix: [[802 262 178] [ 18 84 19] [ 24 26 75]]
print(f'Test Accuracy: {accuracy_score(xgb.predict(x_test),y_test)}')
print(f'Test Confusion matrix: {confusion_matrix(xgb.predict(x_test),y_test)}')
Test Accuracy: 0.5860215053763441 Test Confusion matrix: [[203 76 57] [ 7 5 3] [ 6 5 10]]
print(classification_report(xgb.predict(x_test),y_test))
precision recall f1-score support
0 0.94 0.60 0.74 336
1 0.06 0.33 0.10 15
2 0.14 0.48 0.22 21
accuracy 0.59 372
macro avg 0.38 0.47 0.35 372
weighted avg 0.86 0.59 0.68 372